# -*- coding: utf-8 -*-
import copy
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.util.http_util import retry_request
from zc_core.model.items import Box
from plap.rules import *
from plap.utils.login import SeleniumLogin
from plap.utils.done_filter import SpuPageLogFilter, MaterialHelper
from zc_core.spiders.base import BaseSpider


class SpuSpider(BaseSpider):
    name = 'spu'
    custom_settings = {
        'CONCURRENT_REQUESTS': 12,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 12,
        'CONCURRENT_REQUESTS_PER_IP': 12,
    }
    # 常用链接
    index_url = 'http://mall.plap.cn/oc'
    spu_list_url = 'http://mall.plap.cn/npc/products.html?page={}&q%5Bcatalog_id_eq%5D={}&q%5Bs%5D=id+desc'
    commodity_url = 'http://mall.plap.cn/commodities/{}'

    def __init__(self, batchNo=None, delta=24 * 2, *args, **kwargs):
        super(SpuSpider, self).__init__(batchNo=batchNo, delta=delta, *args, **kwargs)
        # 避免重复采集
        self.done_filter = SpuPageLogFilter(self.batch_no)
        # 物料编码补全
        self.material_helper = MaterialHelper()

    def start_requests(self):
        cookies = SeleniumLogin().get_cookies(self.name)
        # cookies = {'_session_id': '233c220a4ea6f1edd55b8202776cbbf2'}
        if not cookies:
            self.logger.error('init cookie failed...')
            return
        self.logger.info('init cookie: %s', cookies)

        # 品类、品牌
        yield Request(
            url=self.index_url,
            meta={
                'batchNo': self.batch_no,
            },
            callback=self.parse_index,
            errback=self.error_back,
            cookies=cookies,
            priority=200,
            dont_filter=True
        )

    # 处理catalog列表
    def parse_index(self, response):
        # 供应商
        suppliers = parse_supplier(response)
        if suppliers:
            self.logger.info('供应商: count[%s]' % len(suppliers))
            yield Box('supplier', self.batch_no, suppliers)

        # 品类
        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

            for cat in cats:
                cat_id = cat.get('catalogId')
                level = cat.get('level')
                if level == 2:
                    # spu列表第一页
                    page = 1
                    yield Request(
                        url=self.spu_list_url.format(page, cat_id),
                        callback=self.parse_spu_page,
                        errback=self.error_back,
                        meta={
                            'reqType': 'spu',
                            'batchNo': self.batch_no,
                            'catId': cat_id,
                            'page': page,
                        },
                        priority=100,
                        dont_filter=True
                    )

    # 处理spu列表
    def parse_spu_page(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        cat_id = meta.get('catId')

        # 处理第一页
        spu_list = parse_spu(response)
        if spu_list:
            ok_list, todo_list = self.material_helper.fill(spu_list)
            self.logger.info('清单: cat=%s, page=%s, total=%s, ok=%s, todo=%s' % (
                cat_id, cur_page, len(spu_list), len(ok_list), len(todo_list)))
            yield build_spu_log(cat_id, cur_page, len(spu_list), self.batch_no)

            # 已有materialCode
            if ok_list:
                yield Box('spu', self.batch_no, spu_list)

            # 无materialCode发起采集
            if todo_list:
                for todo in todo_list:
                    spu_id = todo.get('spuId')
                    # 采集物料
                    yield Request(
                        url=self.commodity_url.format(spu_id),
                        meta={
                            'reqType': 'group',
                            'batchNo': self.batch_no,
                            'spuId': spu_id,
                            'spu': copy.copy(todo),
                        },
                        callback=self.parse_commodity,
                        errback=self.error_back,
                        priority=200,
                    )

        # 处理总页数
        total_page = parse_spu_page(response)
        if total_page > 0:
            self.logger.info('总页数: cat=%s, total=%s' % (cat_id, total_page))
            for page in range(2, total_page + 1):
                if self.done_filter.contains(cat_id, page):
                    self.logger.info('已采: cat=%s, page=%s' % (cat_id, page))
                    continue

                yield Request(
                    url=self.spu_list_url.format(page, cat_id),
                    callback=self.parse_spu,
                    errback=self.error_back,
                    meta={
                        'reqType': 'spu',
                        'batchNo': self.batch_no,
                        'catId': cat_id,
                        'page': page
                    },
                    priority=100,
                    dont_filter=True
                )

    # 处理spu列表
    def parse_spu(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        cat_id = meta.get('catId')
        spu_list = parse_spu(response)
        if spu_list:
            ok_list, todo_list = self.material_helper.fill(spu_list)
            self.logger.info('清单: cat=%s, page=%s, total=%s, ok=%s, todo=%s' % (
                cat_id, cur_page, len(spu_list), len(ok_list), len(todo_list)))
            yield build_spu_log(cat_id, cur_page, len(spu_list), self.batch_no)

            # 已有materialCode
            if ok_list:
                yield Box('spu', self.batch_no, spu_list)

            # 无materialCode发起采集
            if todo_list:
                for todo in todo_list:
                    spu_id = todo.get('spuId')
                    # 采集物料
                    yield Request(
                        url=self.commodity_url.format(spu_id),
                        meta={
                            'reqType': 'group',
                            'batchNo': self.batch_no,
                            'spuId': spu_id,
                            'spu': copy.copy(todo),
                        },
                        callback=self.parse_commodity,
                        errback=self.error_back,
                        priority=200,
                    )
        else:
            self.logger.info('分页为空: cat=%s, page=%s' % (cat_id, cur_page))

    # 处理Commodity
    def parse_commodity(self, response):
        meta = response.meta
        spu = meta.get('spu')
        spu_id = meta.get('spuId')
        material_code = parse_material_code(response)
        if material_code and spu:
            spu['materialCode'] = material_code
            self.logger.info('物料: spu=%s, mtc=%s' % (spu_id, material_code))
            yield spu
        else:
            self.logger.info('无物料: spu=%s' % spu_id)

    # 错误处理

